#delimit;
set more off;

set logtype text;

log using /home/dnc2101/Accidental_Deaths/Log_Files/create_age_variables_Part_2.log, replace;



/************************************************************************************
*
* This do file takes the raw Census data on the number of persons in different 
* demographic groups in each state and year, cleans it, and generates the state-year
* level demographic variables that are used in making the denominators for the 
* dependent variables used to examine the effects of tort reforms on accidental death 
* rates within specific age groups - i.e., the denominators for the dependent variables in 
* Table 6 of the paper  
* 
* Program by Dan Carvell, written between Summer 2007 and Spring 2010.  
*
************************************************************************************/




/* The raw data from the Census comes in 3 seperate sets of years - 1981 through 1989, 1990 through 1999, and 2000 through 2006    */
/* The format of the Census data changes between these three sets of years                                                         */
/* Therefore, this do file has to manipulate each set of years separately, and then append the 3 sets of years of data together    */

/* The raw Census data for the years 1981-1989 for this do file come from http://www.census.gov/popest/archives/1980s/80s_st_age_sex.html          */
/* The raw Census data for the years 1981-1989 for this do file come from http://www.census.gov/popest/archives/1990s/nat_sex_race_hispanic.html   */
/* The raw Census data for the years 1981-1989 for this do file come from http://www.census.gov/popest/states/asrh/SC-EST2006-04.html              */



** First I'm generating the variables I need in the 1981 through 1989 data;

use "/home2/dnc2101/Accidental_Deaths/Raw_Data/age_gender_1981_1989.dta", clear;


** This dataset has statefip in one column, age as a variable in one column, and then the values taken in different years in different columns;
** The data will need to be reshaped in order to generate the variables I need;


** First age group to look at - persons age 0 through 5;

** First I will drop the observations for ages I don't need;

drop if age>5;

** drop the variables I don't want;

drop males1981 males1982 males1983 males1984 males1985 males1986 males1987 males1988 males1989 females1981 females1982 females1983 females1984 females1985 females1986 females1987 females1988 females1989;

** now collapse by state;

collapse (sum) both_sexes1981 both_sexes1982 both_sexes1983 both_sexes1984 both_sexes1985 both_sexes1986 both_sexes1987 both_sexes1988 both_sexes1989, by (statefip);

** now reshape;

reshape long both_sexes, i(statefip) j(year);

sort statefip year;

order statefip year both_sexes;

gen ages_0_to_5 = both_sexes;
drop both_sexes;


** now save the data, and move on to the next demographic group;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_81_89.dta", replace; 









** next demographic group I examine - adults aged 18 to 64;


use "/home2/dnc2101/Accidental_Deaths/Raw_Data/age_gender_1981_1989.dta", clear;

** First I will drop the observations for ages I don't need;

drop if age<18;
drop if age>64;

** drop the variables I don't want;

drop males1981 males1982 males1983 males1984 males1985 males1986 males1987 males1988 males1989 females1981 females1982 females1983 females1984 females1985 females1986 females1987 females1988 females1989;

** now collapse by state;

collapse (sum) both_sexes1981 both_sexes1982 both_sexes1983 both_sexes1984 both_sexes1985 both_sexes1986 both_sexes1987 both_sexes1988 both_sexes1989, by (statefip);

** now reshape;

reshape long both_sexes, i(statefip) j(year);

sort statefip year;

order statefip year both_sexes;

gen ages_18_to_64 = both_sexes;
drop both_sexes;


** now save the data, and move on to the next demographic group;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_81_89.dta", replace;








** next demographic group I examine - minors between 6 and 17 in age;



use "/home2/dnc2101/Accidental_Deaths/Raw_Data/age_gender_1981_1989.dta", clear;

** First I will drop the observations for ages I don't need;

drop if age<6;
drop if age>17;

** drop the variables I don't want;

drop males1981 males1982 males1983 males1984 males1985 males1986 males1987 males1988 males1989 females1981 females1982 females1983 females1984 females1985 females1986 females1987 females1988 females1989;

** now collapse by state;

collapse (sum) both_sexes1981 both_sexes1982 both_sexes1983 both_sexes1984 both_sexes1985 both_sexes1986 both_sexes1987 both_sexes1988 both_sexes1989, by (statefip);

** now reshape;

reshape long both_sexes, i(statefip) j(year);

sort statefip year;

order statefip year both_sexes;

gen ages_6_to_17 = both_sexes;
drop both_sexes;


** now save the data, and move on to the next demographic group;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_81_89.dta", replace;








** next demographic group - persons age 65 and above;

use "/home2/dnc2101/Accidental_Deaths/Raw_Data/age_gender_1981_1989.dta", clear;

** First I will drop the observations for ages I don't need;

drop if age<65;

** drop the variables I don't want;

drop males1981 males1982 males1983 males1984 males1985 males1986 males1987 males1988 males1989 females1981 females1982 females1983 females1984 females1985 females1986 females1987 females1988 females1989;

** now collapse by state;

collapse (sum) both_sexes1981 both_sexes1982 both_sexes1983 both_sexes1984 both_sexes1985 both_sexes1986 both_sexes1987 both_sexes1988 both_sexes1989, by (statefip);

** now reshape;

reshape long both_sexes, i(statefip) j(year);

sort statefip year;

order statefip year both_sexes;

gen age_65_or_above = both_sexes;
drop both_sexes;


** now save the data, and move on to the next set of years;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_and_above_81_89.dta", replace;



















/* Now, I generate the variables I need in the Census data that covers the years 1990 through 1999      */



** First age group to look at - persons age 0 through 5;
** make sure the name of the variable is the same as the name of the variable in the 1981-1989 data;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop if Age>5;

gen ages_0_to_5= white_male_nh + white_female_nh + black_male_nh + black_female_nh + aian_male_nh + aian_female_nh + api_male_nh + api_female_nh + white_male_hspo + white_female_hspo + black_male_hspo + black_female_hspo + aian_male_hspo + aian_female_hspo + api_male_hspo + api_female_hspo;

drop white_male_nh white_female_nh black_male_nh black_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

collapse (sum) ages_0_to_5, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_years_90_99.dta", replace;




** next demographic group I examine - adults aged 18 to 64;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop if Age<18;
drop if Age>64;


gen ages_18_to_64= white_male_nh + white_female_nh + black_male_nh + black_female_nh + aian_male_nh + aian_female_nh + api_male_nh + api_female_nh + white_male_hspo + white_female_hspo + black_male_hspo + black_female_hspo + aian_male_hspo + aian_female_hspo + api_male_hspo + api_female_hspo;

drop white_male_nh white_female_nh black_male_nh black_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

collapse (sum) ages_18_to_64, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_years_90_99.dta", replace;



** next demographic group I examine - minors between 6 and 17 in age;


use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop if Age<6;
drop if Age>17;


gen ages_6_to_17= white_male_nh + white_female_nh + black_male_nh + black_female_nh + aian_male_nh + aian_female_nh + api_male_nh + api_female_nh + white_male_hspo + white_female_hspo + black_male_hspo + black_female_hspo + aian_male_hspo + aian_female_hspo + api_male_hspo + api_female_hspo;

drop white_male_nh white_female_nh black_male_nh black_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

collapse (sum) ages_6_to_17, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_years_90_99.dta", replace;







** next demographic group - persons age 65 and above;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop if Age<65;

gen age_65_or_above= white_male_nh + white_female_nh + black_male_nh + black_female_nh + aian_male_nh + aian_female_nh + api_male_nh + api_female_nh + white_male_hspo + white_female_hspo + black_male_hspo + black_female_hspo + aian_male_hspo + aian_female_hspo + api_male_hspo + api_female_hspo;

drop white_male_nh white_female_nh black_male_nh black_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

collapse (sum) age_65_or_above, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_90_99.dta", replace;















 
/* Now I generate these variables in the data covering the years 2000 to 2006                                  */
/* The data for 2000-2006 comes in two seperate files - one with data from Alabama through Missouri,           */
/* the other with data from Montana through Wyoming.  These datasets need to be reshaped from wide into        */
/* long format, and then appended together                                                                     */
/* A previous do file, create_age_race_gender.do, already created that reshaped and appended dataset           */
/* that has the Census data from 2000 through 2005, so all I need to do here is open that particular dataset   */


** First age group to look at - persons age 0 through 5;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

drop if age>5;

collapse (sum) Popestimate, by (state year);

gen ages_0_to_5= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year ages_0_to_5;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_years_00_05.dta", replace;




** next demographic group I examine - adults aged 18 to 64;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

drop if age<18;
drop if age>64;

collapse (sum) Popestimate, by (state year);

gen ages_18_to_64= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year ages_18_to_64;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_years_00_05.dta", replace;


** next demographic group I examine - minors between 6 and 17 in age;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

drop if age<6;
drop if age>17;

collapse (sum) Popestimate, by (state year);

gen ages_6_to_17= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year ages_6_to_17;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_years_00_05.dta", replace;






** next demographic group - persons age 65 and above;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

drop if age<65;

collapse (sum) Popestimate, by (state year);

gen age_65_or_above= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year age_65_or_above;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_00_05.dta", replace;





















** Now, append all three sets of years of data together;

** First, people age 0 to 5;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_81_89.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_years_90_99.dta";

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_years_00_05.dta";

sort statefip year;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_years_81_05.dta", replace;

xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_5_years_81_05.xml", doctype(excel) replace;




** Now people aged 6 to 17;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_81_89.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_years_90_99.dta";

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_years_00_05.dta";

sort statefip year;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_years_81_05.dta", replace;

xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_6_to_17_years_81_05.xml", doctype(excel) replace;



** Now people aged 18 to 64;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_81_89.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_years_90_99.dta";

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_years_00_05.dta";

sort statefip year;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_years_81_05.dta", replace;

xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_18_to_64_years_81_05.xml", doctype(excel) replace;




** Now people aged 65 and above;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_and_above_81_89.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_90_99.dta";

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_00_05.dta";

sort statefip year;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_81_05.dta", replace;

xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_81_05.xml", doctype(excel) replace;





log close;




